# Zhou-Horiuchi, Study 2

# Initial settings --------------------------------------------------------

dir.create("output/study2", recursive = TRUE, showWarnings = FALSE)
dir.create("figures/study2", recursive = TRUE, showWarnings = FALSE)
dir.create("logs", recursive = TRUE, showWarnings = FALSE)

source("functions/logging.R")
start_script_log("study2")

options(tidyverse.quiet = TRUE)
library(tidyverse)
library(estimatr)
library(ggthemes)
library(patchwork)
library(mirt)

# TracePlot ---------------------------------------------------------------

# I could not install and load the ggmirt package in the following way:
# install.packages("devtools")
# devtools::install_github("masurp/ggmirt")
# library(ggmirt)

# Thus, I manually saved the function from masurp's GitHub:
# https://github.com/masurp/ggmirt/blob/main/R/tracePlot.r
source("functions/tracePlot.r")

# Read and wrangle data ---------------------------------------------------

source("functions/read_Qualtrics.R")
source("functions/reshape_conjoint.R")
source("functions/wrangle_data.R")

out <- wrangle_data("study2")
cat("\n------------------------------------------------------------\n")
cat("Saved data: output/study2/data_for_analysis.rds\n")
cat("------------------------------------------------------------\n\n")

# Test pre-registered hypotheses ------------------------------------------

source("functions/test_hypotheses.R")
test_hypotheses(out[[2]])[[1]]
ggsave("figures/study2/pre-registered_hypotheses.pdf", width = 8, height = 4)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/pre-registered_hypotheses.pdf\n")
cat("------------------------------------------------------------\n\n")

# Visualize profile-level marginal means ----------------------------------

# Note: This is not the main analysis we pre-registered. Here, we calculate
# the marginal means using the standard approach (the unit of analysis is
# each profile). But our pre-registered analysis use each profile pair as
# the unit of analysis.

source("functions/visualize_MMs.R")
visualize_MMs(out[[1]])
ggsave("figures/study2/profile_level_MMs.pdf", width = 8, height = 6)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/profile_level_MMs.pdf\n")
cat("------------------------------------------------------------\n\n")

# IRT ---------------------------------------------------------------------

source("functions/expand_profile_data.R")
irt_input <- out[[3]] %>%
  select(starts_with("contact")) %>%
  mutate(
    contact1 = score_agree_high(contact1),
    contact2 = score_agree_low(contact2),
    contact3 = score_agree_high(contact3),
    contact4 = score_agree_low(contact4)
  )

fitGraded <- mirt(
  data = irt_input,
  model = 1, # alternatively, we could also just specify model = 1 in this case
  itemtype = "graded",
)

# Testing fit of model
M2(fitGraded, type = "C2", calcNULL = FALSE)
itemfit(fitGraded)
head(personfit(fitGraded))

score <- fscores(fitGraded)
pdf("figures/study2/irt_traceplot.pdf", width = 8, height = 6)
tracePlot(fitGraded)
dev.off()
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/irt_traceplot.pdf\n")
cat("------------------------------------------------------------\n\n")

irt_data <- out[[3]] %>%
  mutate(
    irt_score = score,
    agg_score = ((score_agree_high(contact1) +
      score_agree_low(contact2) +
      score_agree_high(contact3) +
      score_agree_low(contact4)) / 4),
    irt_tercile = make_terciles(irt_score),
    agg_tercile = make_terciles(agg_score)
  )

# Explore more ------------------------------------------------------------

source("functions/make_figure.R")

# Make respondent-level data

profile_pairs_expanded <- expand_profile_data(out[[2]], out[[3]])

d_contact <- profile_pairs_expanded %>%
  left_join(irt_data %>% select(ResponseId, irt_score, agg_score, irt_tercile, agg_tercile),
    by = join_by(ResponseId)
  ) %>%
  mutate(
    gender = factor(Q9.4, levels = c("Male", "Female")),
    education = case_when(
      Q9.5 == "Universities and Graduate Schools" ~ "High",
      TRUE ~ "Low"
    ),
    age = Q9.2
  ) |>
  select(ResponseId, irt_score, irt_tercile, agg_tercile, gender, education, age, political_affiliation) |>
  distinct()

# Run a model to predict contact

model_contact <- lm(irt_score ~ gender + education + age + I(age^2) + factor(political_affiliation), data = d_contact)

d_resid <- d_contact |>
  mutate(
    residual = residuals(model_contact),
    residual_tercile = ntile(residual, 3),
    residual_tercile = factor(residual_tercile,
      levels = 1:3,
      labels = c("Low", "Medium", "High")
    )
  )

# Add residuals

profile_pairs_expanded_updated <- profile_pairs_expanded |>
  left_join(d_resid |> select(ResponseId, residual_tercile, agg_tercile, irt_tercile),
    by = join_by(ResponseId)
  )

# Save figures

make_figure(profile_pairs_expanded_updated, quo(agg_tercile), "Contact")
ggsave("figures/study2/contact_agg_MMs.pdf", width = 8, height = 6)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/contact_agg_MMs.pdf\n")
cat("------------------------------------------------------------\n\n")

make_figure(profile_pairs_expanded_updated, quo(irt_tercile), "Contact Based on IRT")
ggsave("figures/study2/contact_irt_MMs.pdf", width = 8, height = 6)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/contact_irt_MMs.pdf\n")
cat("------------------------------------------------------------\n\n")

ggplot(irt_data, aes(x = agg_score, y = irt_score)) +
  geom_point() +
  geom_smooth(method = "lm_robust", se = FALSE, color = "red") +
  ggtitle("Correlation Between Aggregated Score and Score Based on IRT") +
  xlab("Aggregate Contact Score") +
  ylab("Contact Score Based on IRT") +
  theme_few()
ggsave("figures/study2/agg_irt_corr.pdf", width = 8, height = 6)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/agg_irt_corr.pdf\n")
cat("------------------------------------------------------------\n\n")

make_figure(profile_pairs_expanded_updated, quo(residual_tercile), "Contact (Resid.) Based on IRT")
ggsave("figures/study2/contact_irtResidual_MMs.pdf", width = 8, height = 6)
cat("\n------------------------------------------------------------\n")
cat("Saved figure: figures/study2/contact_irtResidual_MMs.pdf\n")
cat("------------------------------------------------------------\n\n")


end_script_log()
